/*
 *  This file is part of the KDE libraries
 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
 *  Copyright (C) 2006 Apple Computer, Inc.
 *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public License
 *  along with this library; see the file COPYING.LIB.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 *  Boston, MA 02110-1301, USA.
 *
 */

#include "lexer.h"
#include <string.h>
#include <limits.h>

#include "dtoa.h"
#include "function.h"
#include "interpreter.h"
#include "nodes.h"
#include "commonunicode.h"
#include "wtf/ASCIICType.h"
#include "wtf/DisallowCType.h"
#include <wtf/unicode/libc/UnicodeLibC.h>

using namespace WTF;
using namespace Unicode;

// GCC cstring uses these automatically, but not all implementations do.
using std::strlen;
using std::strcpy;
using std::strncpy;
using std::memset;
using std::memcpy;

// we can't specify the namespace in yacc's C output, so do it here
using namespace KJS;

#include "grammar.h"

#include "lookup.h"
#include "lexer.lut.h"

extern YYLTYPE kjsyylloc; // global bison variable holding token info

// a bridge for yacc from the C world to C++
int kjsyylex()
{
    return lexer().lex();
}

namespace KJS
{

static bool isDecimalDigit(int c);

static const size_t initialReadBufferCapacity = 32;
static const size_t initialStringTableCapacity = 64;

Lexer &lexer()
{
    // ASSERT(JSLock::currentThreadIsHoldingLock());

    // FIXME: We'd like to avoid calling new here, but we don't currently
    // support tearing down the Lexer at app quit time, since that would involve
    // tearing down its UString data members without holding the JSLock.
    static Lexer *staticLexer = new Lexer;
    return *staticLexer;
}

Lexer::Lexer()
    : yylineno(0)
    , restrKeyword(false)
    , eatNextIdentifier(false)
    , stackToken(-1)
    , lastToken(-1)
    , pos(0)
    , code(nullptr)
    , length(0)
#ifndef KJS_PURE_ECMA
    , bol(true)
#endif
    , current(0)
    , next1(0)
    , next2(0)
    , next3(0)
{
    m_buffer8.reserveCapacity(initialReadBufferCapacity);
    m_buffer16.reserveCapacity(initialReadBufferCapacity);
    m_strings.reserveCapacity(initialStringTableCapacity);
    m_identifiers.reserveCapacity(initialStringTableCapacity);
}

void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
{
    yylineno = startingLineNumber;
    m_sourceURL = sourceURL;
    restrKeyword = false;
    delimited = false;
    eatNextIdentifier = false;
    stackToken = -1;
    lastToken = -1;
    pos = 0;
    code = c;
    length = len;
    skipLF = false;
    skipCR = false;
    error = false;
#ifndef KJS_PURE_ECMA
    bol = true;
#endif

    // read first characters
    current = (length > 0) ? code[0].uc : -1;
    next1 = (length > 1) ? code[1].uc : -1;
    next2 = (length > 2) ? code[2].uc : -1;
    next3 = (length > 3) ? code[3].uc : -1;
}

void Lexer::shift(unsigned int p)
{
    // Here would be a good place to strip Cf characters, but that has caused compatibility problems:
    // <http://bugs.webkit.org/show_bug.cgi?id=10183>.
    while (p--) {
        current = next1;
        next1 = next2;
        next2 = next3;
        pos++;
        next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
    }
}

// called on each new line
void Lexer::nextLine()
{
    yylineno++;
#ifndef KJS_PURE_ECMA
    bol = true;
#endif
}

void Lexer::setDone(State s)
{
    state = s;
    done = true;
}

int Lexer::lex()
{
    int token = 0;
    state = Start;
    unsigned short stringType = 0; // either single or double quotes
    m_buffer8.clear();
    m_buffer16.clear();
    done = false;
    terminator = false;
    skipLF = false;
    skipCR = false;

    // did we push a token on the stack previously ?
    // (after an automatic semicolon insertion)
    if (stackToken >= 0) {
        setDone(Other);
        token = stackToken;
        stackToken = 0;
    }

    while (!done) {
        if (skipLF && current != '\n') { // found \r but not \n afterwards
            skipLF = false;
        }
        if (skipCR && current != '\r') { // found \n but not \r afterwards
            skipCR = false;
        }
        if (skipLF || skipCR) { // found \r\n or \n\r -> eat the second one
            skipLF = false;
            skipCR = false;
            shift(1);
        }
        switch (state) {
        case Start:
            if (isWhiteSpace()) {
                // do nothing
            } else if (current == '/' && next1 == '/') {
                shift(1);
                state = InSingleLineComment;
            } else if (current == '/' && next1 == '*') {
                shift(1);
                state = InMultiLineComment;
            } else if (current == -1) {
                if (!terminator && !delimited) {
                    // automatic semicolon insertion if program incomplete
                    token = ';';
                    stackToken = 0;
                    setDone(Other);
                } else {
                    setDone(Eof);
                }
            } else if (isLineTerminator()) {
                nextLine();
                terminator = true;
                if (restrKeyword) {
                    token = ';';
                    setDone(Other);
                }
            } else if (current == '"' || current == '\'') {
                state = InString;
                stringType = static_cast<unsigned short>(current);
            } else if (isIdentStart(current)) {
                record16(current);
                state = InIdentifierOrKeyword;
            } else if (current == '\\') {
                state = InIdentifierStartUnicodeEscapeStart;
            } else if (current == '0') {
                record8(current);
                state = InNum0;
            } else if (isDecimalDigit(current)) {
                record8(current);
                state = InNum;
            } else if (current == '.' && isDecimalDigit(next1)) {
                record8(current);
                state = InDecimal;
#ifndef KJS_PURE_ECMA
                // <!-- marks the beginning of a line comment (for www usage)
            } else if (current == '<' && next1 == '!' &&
                       next2 == '-' && next3 == '-') {
                shift(3);
                state = InSingleLineComment;
                // same for -->
            } else if (bol && current == '-' && next1 == '-' &&  next2 == '>') {
                shift(2);
                state = InSingleLineComment;
#endif
            } else {
                token = matchPunctuator(current, next1, next2, next3);
                if (token != -1) {
                    setDone(Other);
                } else {
                    //      cerr << "encountered unknown character" << endl;
                    setDone(Bad);
                }
            }
            break;
        case InString:
            switch (current) {
            case '\'':
            case '"':
                if (current == stringType) {
                    shift(1);
                    setDone(String);
                } else {
                    record16(current);
                }
                break;
            case '\\':
                state = InEscapeSequence;
                break;
            case '\n':
            case '\r':
            case 0x2028:
            case 0x2029:
            case -1:
                // encountered newline or eof
                setDone(Bad);
                break;
            default:
                record16(current);
                break;
            }
            break;
        // Escape Sequences inside of strings
        case InEscapeSequence:
            if (isOctalDigit(current)) {
                if (current >= '0' && current <= '3' &&
                        isOctalDigit(next1) && isOctalDigit(next2)) {
                    record16(convertOctal(current, next1, next2));
                    shift(2);
                    state = InString;
                } else if (isOctalDigit(current) && isOctalDigit(next1)) {
                    record16(convertOctal('0', current, next1));
                    shift(1);
                    state = InString;
                } else if (isOctalDigit(current)) {
                    record16(convertOctal('0', '0', current));
                    state = InString;
                } else {
                    setDone(Bad);
                }
            } else if (current == 'x') {
                state = InHexEscape;
            } else if (current == 'u') {
                state = InUnicodeEscape;
            } else if (isLineTerminator()) {
                nextLine();
                state = InString;
            } else {
                record16(singleEscape(static_cast<unsigned short>(current)));
                state = InString;
            }
            break;
        case InHexEscape:
            if (isHexDigit(current) && isHexDigit(next1)) {
                state = InString;
                record16(convertHex(current, next1));
                shift(1);
            } else {
                setDone(Bad);
            }
            break;
        case InUnicodeEscape:
            if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
                record16(convertUnicode(current, next1, next2, next3));
                shift(3);
                state = InString;
            } else if (current == stringType) {
                record16('u');
                shift(1);
                setDone(String);
            } else {
                setDone(Bad);
            }
            break;
        case InSingleLineComment:
            if (isLineTerminator()) {
                nextLine();
                terminator = true;
                if (restrKeyword) {
                    token = ';';
                    setDone(Other);
                } else {
                    state = Start;
                }
            } else if (current == -1) {
                setDone(Eof);
            }
            break;
        case InMultiLineComment:
            if (current == -1) {
                setDone(Bad);
            } else if (isLineTerminator()) {
                nextLine();
            } else if (current == '*' && next1 == '/') {
                state = Start;
                shift(1);
            }
            break;
        case InIdentifierOrKeyword:
        case InIdentifier:
            if (isIdentPart(current)) {
                record16(current);
            } else if (current == '\\') {
                state = InIdentifierPartUnicodeEscapeStart;
            } else {
                setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
            }
            break;
        case InNum0:
            if (current == 'x' || current == 'X') {
                m_buffer8.clear();
                state = InHex;
            } else if (current == 'b' || current == 'B') {
                m_buffer8.clear();
                state = InBinary;
            } else if (current == 'o' || current == 'O') {
                m_buffer8.clear();
                state = InOctal;
            } else if (current == '.') {
                record8(current);
                state = InDecimal;
            } else if (current == 'e' || current == 'E') {
                record8(current);
                state = InExponentIndicator;
            } else if (isOctalDigit(current)) {
                record8(current);
                state = InLegacyOctal;
            } else if (isDecimalDigit(current)) {
                record8(current);
                state = InDecimal;
            } else {
                setDone(Number);
            }
            break;
        case InHex:
            if (isHexDigit(current)) {
                record8(current);
            } else {
                setDone(Hex);
            }
            break;
        case InOctal:
            if (isOctalDigit(current)) {
                record8(current);
            } else if (isDecimalDigit(current)) {
                setDone(Bad);
            } else {
                setDone(Octal);
            }
            break;
        case InLegacyOctal:
            if (isOctalDigit(current)) {
                record8(current);
            } else if (isDecimalDigit(current)) {
                record8(current);
                state = InDecimal;
            } else {
                setDone(Octal);
            }
            break;
        case InBinary:
            if (isBinaryDigit(current)) {
                record8(current);
            } else if (isDecimalDigit(current)) {
                setDone(Bad);
            } else {
                setDone(Binary);
            }
            break;
        case InNum:
            if (isDecimalDigit(current)) {
                record8(current);
            } else if (current == '.') {
                record8(current);
                state = InDecimal;
            } else if (current == 'e' || current == 'E') {
                record8(current);
                state = InExponentIndicator;
            } else {
                setDone(Number);
            }
            break;
        case InDecimal:
            if (isDecimalDigit(current)) {
                record8(current);
            } else if (current == 'e' || current == 'E') {
                record8(current);
                state = InExponentIndicator;
            } else {
                setDone(Number);
            }
            break;
        case InExponentIndicator:
            if (current == '+' || current == '-') {
                record8(current);
            } else if (isDecimalDigit(current)) {
                record8(current);
                state = InExponent;
            } else {
                setDone(Bad);
            }
            break;
        case InExponent:
            if (isDecimalDigit(current)) {
                record8(current);
            } else {
                setDone(Number);
            }
            break;
        case InIdentifierStartUnicodeEscapeStart:
            if (current == 'u') {
                state = InIdentifierStartUnicodeEscape;
            } else {
                setDone(Bad);
            }
            break;
        case InIdentifierPartUnicodeEscapeStart:
            if (current == 'u') {
                state = InIdentifierPartUnicodeEscape;
            } else {
                setDone(Bad);
            }
            break;
        case InIdentifierStartUnicodeEscape:
            if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
                setDone(Bad);
                break;
            }
            token = convertUnicode(current, next1, next2, next3).uc;
            shift(3);
            if (!isIdentStart(token)) {
                setDone(Bad);
                break;
            }
            record16(token);
            state = InIdentifier;
            break;
        case InIdentifierPartUnicodeEscape:
            if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
                setDone(Bad);
                break;
            }
            token = convertUnicode(current, next1, next2, next3).uc;
            shift(3);
            if (!isIdentPart(token)) {
                setDone(Bad);
                break;
            }
            record16(token);
            state = InIdentifier;
            break;
        default:
            assert(!"Unhandled state in switch statement");
        }

        // move on to the next character
        if (!done) {
            shift(1);
        }
#ifndef KJS_PURE_ECMA
        if (state != Start && state != InMultiLineComment) {
            bol = false;
        }
#endif
    }

    // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
    if ((state == Number || state == Octal || state == Hex || state == Binary) &&
	isIdentStart(current)) {
        state = Bad;
    }

    // terminate string
    m_buffer8.append('\0');

#ifdef KJS_DEBUG_LEX
    fprintf(stderr, "line: %d ", lineNo());
    fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
    fprintf(stderr, "%s ", m_buffer8.data());
#endif

    double dval = 0;
    if (state == Number) {
        dval = kjs_strtod(m_buffer8.data(), nullptr);
    } else if (state == Hex) { // scan hex numbers
        // buffer contains "...\0" found after 0x
        if (m_buffer8.size() > 1) {
            const char *p = m_buffer8.data();
            while (char c = *p++) {
                dval *= 16;
                dval += convertHex(c);
            }
            if (dval >= mantissaOverflowLowerBound) {
                dval = parseIntOverflow(m_buffer8.data(), m_buffer8.size() - 1, 16);
            }
            state = Number;
        } else {
            // no digits seen after 0x
            state = Bad;
        }
    } else if (state == Octal) {   // scan octal number
        // buffer contains "...\0" found after 0o
        if (m_buffer8.size() > 1) {
            const char *p = m_buffer8.data();
            while (char c = *p++) {
                dval *= 8;
                dval += c - '0';
            }
            if (dval >= mantissaOverflowLowerBound) {
                dval = parseIntOverflow(m_buffer8.data(), m_buffer8.size() - 1, 8);
            }
            state = Number;
        } else {
            // no octal digits after 0o
            state = Bad;
        }
    } else if (state == Binary) { // scan binary numbers
        // buffer contains the binary digits after "0b". E.g. "1010\0"
        if (m_buffer8.size () > 1) {
            const char *p = m_buffer8.data();
            while (char c = *p++) {
                dval *= 2;
                dval += convertHex(c);
            }
            if (dval >= mantissaOverflowLowerBound) {
                dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 2);
            }
            state = Number;
        } else {
            state = Bad;
        }
    }

#ifdef KJS_DEBUG_LEX
    switch (state) {
    case Eof:
        printf("(EOF)\n");
        break;
    case Other:
        printf("(Other)\n");
        break;
    case Identifier:
        printf("(Identifier)/(Keyword)\n");
        break;
    case String:
        printf("(String)\n");
        break;
    case Number:
        printf("(Number)\n");
        break;
    default:
        printf("(unknown)");
    }
#endif

    if (state != Identifier && eatNextIdentifier) {
        eatNextIdentifier = false;
    }

    restrKeyword = false;
    delimited = false;
    kjsyylloc.first_line = yylineno; // ???
    kjsyylloc.last_line = yylineno;

    switch (state) {
    case Eof:
        token = 0;
        break;
    case Other:
        if (token == '}' || token == ';') {
            delimited = true;
        }
        break;
    case IdentifierOrKeyword:
        if ((token = Lookup::find(&mainTable, m_buffer16.data(), m_buffer16.size())) < 0) {
        case Identifier:
            // Lookup for keyword failed, means this is an identifier
            // Apply anonymous-function hack below (eat the identifier)
            if (eatNextIdentifier) {
                eatNextIdentifier = false;
                token = lex();
                break;
            }
            kjsyylval.ident = makeIdentifier(m_buffer16);
            token = IDENT;
            break;
        }

        eatNextIdentifier = false;
        // Hack for "f = function somename() { ... }", too hard to get into the grammar
        if (token == FUNCTION && lastToken == '=') {
            eatNextIdentifier = true;
        }

        if (token == CONTINUE || token == BREAK ||
                token == RETURN || token == THROW) {
            restrKeyword = true;
        }
        break;
    case String:
        kjsyylval.ustr = makeUString(m_buffer16);
        token = STRING;
        break;
    case Number:
        kjsyylval.dval = dval;
        token = NUMBER;
        break;
    case Bad:
#ifdef KJS_DEBUG_LEX
        fprintf(stderr, "KJS: yylex: ERROR.\n");
#endif
        error = true;
        return -1;
    default:
        assert(!"unhandled numeration value in switch");
        error = true;
        return -1;
    }
    lastToken = token;
    return token;
}

bool Lexer::isWhiteSpace() const
{
    return CommonUnicode::isWhiteSpace(current);
}

bool Lexer::isLineTerminator()
{
    bool cr = (current == '\r');
    bool lf = (current == '\n');
    if (cr) {
        skipLF = true;
    } else if (lf) {
        skipCR = true;
    }
    return cr || lf || current == 0x2028 || current == 0x2029;
}

typedef bool (CharacterCheck)(int c);

static bool isIdentStartLibC(int c)
{
    return (category(c) & (Letter_Uppercase | Letter_Lowercase |
                           Letter_Titlecase | Letter_Modifier | Letter_Other))
           || c == '$' || c == '_';
}

static bool isIdentPartLibC(int c)
{
    return (category(c) & (Letter_Uppercase | Letter_Lowercase |
                           Letter_Titlecase | Letter_Modifier | Letter_Other |
                           Mark_NonSpacing | Mark_SpacingCombining |
                           Number_DecimalDigit | Punctuation_Connector))
           || c == '$' || c == '_';
}

static CharacterCheck *identStart = ::isIdentStartLibC;
static CharacterCheck *identPart = ::isIdentPartLibC;

void Lexer::setIdentStartChecker(bool (*f)(int c))
{
    identStart = f;
}

void Lexer::setIdentPartChecker(bool (*f)(int c))
{
    identPart = f;
}

bool Lexer::isIdentStart(int c)
{
    return (*identStart)(c);
}

bool Lexer::isIdentPart(int c)
{
    return (*identPart)(c);
}

static bool isDecimalDigit(int c)
{
    return (c >= '0' && c <= '9');
}

bool Lexer::isHexDigit(int c)
{
    return ((c >= '0' && c <= '9') ||
            (c >= 'a' && c <= 'f') ||
            (c >= 'A' && c <= 'F'));
}

bool Lexer::isBinaryDigit(int c)
{
    return c == '0' || c == '1';
}

bool Lexer::isOctalDigit(int c)
{
    return (c >= '0' && c <= '7');
}

int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
{
    if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
        shift(4);
        return URSHIFTEQUAL;
    } else if (c1 == '=' && c2 == '=' && c3 == '=') {
        shift(3);
        return STREQ;
    } else if (c1 == '!' && c2 == '=' && c3 == '=') {
        shift(3);
        return STRNEQ;
    } else if (c1 == '>' && c2 == '>' && c3 == '>') {
        shift(3);
        return URSHIFT;
    } else if (c1 == '<' && c2 == '<' && c3 == '=') {
        shift(3);
        return LSHIFTEQUAL;
    } else if (c1 == '>' && c2 == '>' && c3 == '=') {
        shift(3);
        return RSHIFTEQUAL;
    } else if (c1 == '<' && c2 == '=') {
        shift(2);
        return LE;
    } else if (c1 == '>' && c2 == '=') {
        shift(2);
        return GE;
    } else if (c1 == '!' && c2 == '=') {
        shift(2);
        return NE;
    } else if (c1 == '+' && c2 == '+') {
        shift(2);
        if (terminator) {
            return AUTOPLUSPLUS;
        } else {
            return PLUSPLUS;
        }
    } else if (c1 == '-' && c2 == '-') {
        shift(2);
        if (terminator) {
            return AUTOMINUSMINUS;
        } else {
            return MINUSMINUS;
        }
    } else if (c1 == '=' && c2 == '=') {
        shift(2);
        return EQEQ;
    } else if (c1 == '+' && c2 == '=') {
        shift(2);
        return PLUSEQUAL;
    } else if (c1 == '-' && c2 == '=') {
        shift(2);
        return MINUSEQUAL;
    } else if (c1 == '*' && c2 == '=') {
        shift(2);
        return MULTEQUAL;
    } else if (c1 == '/' && c2 == '=') {
        shift(2);
        return DIVEQUAL;
    } else if (c1 == '*' && c2 == '*' && c3 == '=') {
        shift(3);
        return EXPEQUAL;
    } else if (c1 == '&' && c2 == '=') {
        shift(2);
        return ANDEQUAL;
    } else if (c1 == '^' && c2 == '=') {
        shift(2);
        return XOREQUAL;
    } else if (c1 == '%' && c2 == '=') {
        shift(2);
        return MODEQUAL;
    } else if (c1 == '|' && c2 == '=') {
        shift(2);
        return OREQUAL;
    } else if (c1 == '<' && c2 == '<') {
        shift(2);
        return LSHIFT;
    } else if (c1 == '>' && c2 == '>') {
        shift(2);
        return RSHIFT;
    } else if (c1 == '&' && c2 == '&') {
        shift(2);
        return AND;
    } else if (c1 == '|' && c2 == '|') {
        shift(2);
        return OR;
    } else if (c1 == '*' && c2 == '*') {
	shift(2);
	return T_EXP;
    }

    switch (c1) {
    case '=':
    case '>':
    case '<':
    case ',':
    case '!':
    case '~':
    case '?':
    case ':':
    case '.':
    case '+':
    case '-':
    case '*':
    case '/':
    case '&':
    case '|':
    case '^':
    case '%':
    case '(':
    case ')':
    case '{':
    case '}':
    case '[':
    case ']':
    case ';':
        shift(1);
        return static_cast<int>(c1);
    default:
        return -1;
    }
}

unsigned short Lexer::singleEscape(unsigned short c)
{
    switch (c) {
    case 'b':
        return 0x08;
    case 't':
        return 0x09;
    case 'n':
        return 0x0A;
    case 'v':
        return 0x0B;
    case 'f':
        return 0x0C;
    case 'r':
        return 0x0D;
    case '"':
        return 0x22;
    case '\'':
        return 0x27;
    case '\\':
        return 0x5C;
    default:
        return c;
    }
}

unsigned short Lexer::convertOctal(int c1, int c2, int c3)
{
    return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
}

unsigned char Lexer::convertHex(int c)
{
    if (c >= '0' && c <= '9') {
        return static_cast<unsigned char>(c - '0');
    }
    if (c >= 'a' && c <= 'f') {
        return static_cast<unsigned char>(c - 'a' + 10);
    }
    return static_cast<unsigned char>(c - 'A' + 10);
}

unsigned char Lexer::convertHex(int c1, int c2)
{
    return ((convertHex(c1) << 4) + convertHex(c2));
}

KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
{
    return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
                      (convertHex(c3) << 4) + convertHex(c4));
}

void Lexer::record8(int c)
{
    ASSERT(c >= 0);
    ASSERT(c <= 0xff);
    m_buffer8.append(c);
}

void Lexer::record16(int c)
{
    ASSERT(c >= 0);
    ASSERT(c <= USHRT_MAX);
    record16(UChar(static_cast<unsigned short>(c)));
}

void Lexer::record16(KJS::UChar c)
{
    m_buffer16.append(c);
}

bool Lexer::scanRegExp()
{
    m_buffer16.clear();
    bool lastWasEscape = false;
    bool inBrackets = false;

    while (1) {
        if (isLineTerminator() || current == -1) {
            return false;
        } else if (current != '/' || lastWasEscape == true || inBrackets == true) {
            // keep track of '[' and ']'
            if (!lastWasEscape) {
                if (current == '[' && !inBrackets) {
                    inBrackets = true;
                }
                if (current == ']' && inBrackets) {
                    inBrackets = false;
                }
            }
            record16(current);
            lastWasEscape =
                !lastWasEscape && (current == '\\');
        } else { // end of regexp
            m_pattern = UString(m_buffer16);
            m_buffer16.clear();
            shift(1);
            break;
        }
        shift(1);
    }

    while (isIdentPart(current)) {
        record16(current);
        shift(1);
    }
    m_flags = UString(m_buffer16);

    return true;
}

void Lexer::clear()
{
    deleteAllValues(m_strings);
    Vector<UString *> newStrings;
    newStrings.reserveCapacity(initialStringTableCapacity);
    m_strings.swap(newStrings);
    deleteAllValues(m_identifiers);
    Vector<KJS::Identifier *> newIdentifiers;
    newIdentifiers.reserveCapacity(initialStringTableCapacity);
    m_identifiers.swap(newIdentifiers);

    Vector<char> newBuffer8;
    newBuffer8.reserveCapacity(initialReadBufferCapacity);
    m_buffer8.swap(newBuffer8);

    Vector<UChar> newBuffer16;
    newBuffer16.reserveCapacity(initialReadBufferCapacity);
    m_buffer16.swap(newBuffer16);

    m_pattern = nullptr;
    m_flags = nullptr;
    m_sourceURL = nullptr;
}

Identifier *Lexer::makeIdentifier(const Vector<KJS::UChar> &buffer)
{
    KJS::Identifier *identifier = new KJS::Identifier(buffer.data(), buffer.size());
    m_identifiers.append(identifier);
    return identifier;
}

UString *Lexer::makeUString(const Vector<KJS::UChar> &buffer)
{
    UString *string = new UString(buffer);
    m_strings.append(string);
    return string;
}

} // namespace KJS
